# https://www.meishij.net/zuofa/guangdongchangfen_6.html

import re

# https://www.meishij.net/xiaochi/guangdongxiaochi/p1/
import requests

head_data = {
    # 字典，需要有一个键值对
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
}


def get_main_page(url):
    html = requests.get(url, headers=head_data)
    # print(html.text)
    sub_url = re.findall('<a href="(.*?)" class="list_s2_item_img"', html.text)
    for i in sub_url:
        sub_page(i)

    # print(sub_url)
    # print(len(sub_url))
    # time.sleep(100)


def sub_page(sub_url):
    html = requests.get(sub_url, headers=head_data)
    # print(html.text)
    main_em = re.findall(
        '<strong><a target="_blank" href="https://www.meishij.net/shicaizuofa/.*?/">(.*?)</a>(.*?)</strong>', html.text)
    print(main_em)
    others_em = re.findall('<strong><a target="_blank" href="https://www.meishij.net/[^a-z]+">(.*?)</a>(.*?)</strong>',
                           html.text)
    print(others_em)
    # time.sleep(100)


for i in range(1, 6):
    url = f'https://www.meishij.net/xiaochi/guangdongxiaochi/p{i}/'
    get_main_page(url)

# shicaizuofa 食材做法
# <strong><a target="_blank" href="https://www.meishij.net/shicaizuofa/zhanmifen/">粘米粉</a>250克</strong>
# <strong><a target="_blank" href="https://www.meishij.net/shicaizuofa/chengfen/">澄面</a>75克</strong>

# <strong><a target="_blank" href="https://www.meishij.net/花生油">油</a>15克</strong>
# <strong><a target="_blank" href="https://www.meishij.net/精盐">盐</a>4克</strong>
